// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H

namespace Eigen {

namespace internal {

    template <typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>
    struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Conjugate, StorageOrder>
    {
        static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
        {
            triangular_solve_vector<LhsScalar,
                                    RhsScalar,
                                    Index,
                                    OnTheLeft,
                                    ((Mode & Upper) == Upper ? Lower : Upper) | (Mode & UnitDiag),
                                    Conjugate,
                                    StorageOrder == RowMajor ? ColMajor : RowMajor>::run(size, _lhs, lhsStride, rhs);
        }
    };

    // forward and backward substitution, row-major, rhs is a vector
    template <typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
    struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, RowMajor>
    {
        enum
        {
            IsLower = ((Mode & Lower) == Lower)
        };
        static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
        {
            typedef Map<const Matrix<LhsScalar, Dynamic, Dynamic, RowMajor>, 0, OuterStride<>> LhsMap;
            const LhsMap lhs(_lhs, size, size, OuterStride<>(lhsStride));

            typedef const_blas_data_mapper<LhsScalar, Index, RowMajor> LhsMapper;
            typedef const_blas_data_mapper<RhsScalar, Index, ColMajor> RhsMapper;

            typename internal::conditional<Conjugate, const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>, LhsMap>, const LhsMap&>::type cjLhs(
                lhs);
            static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
            for (Index pi = IsLower ? 0 : size; IsLower ? pi < size : pi > 0; IsLower ? pi += PanelWidth : pi -= PanelWidth)
            {
                Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);

                Index r = IsLower ? pi : size - pi;  // remaining size
                if (r > 0)
                {
                    // let's directly call the low level product function because:
                    // 1 - it is faster to compile
                    // 2 - it is slightly faster at runtime
                    Index startRow = IsLower ? pi : pi - actualPanelWidth;
                    Index startCol = IsLower ? 0 : pi;

                    general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, Conjugate, RhsScalar, RhsMapper, false>::run(
                        actualPanelWidth,
                        r,
                        LhsMapper(&lhs.coeffRef(startRow, startCol), lhsStride),
                        RhsMapper(rhs + startCol, 1),
                        rhs + startRow,
                        1,
                        RhsScalar(-1));
                }

                for (Index k = 0; k < actualPanelWidth; ++k)
                {
                    Index i = IsLower ? pi + k : pi - k - 1;
                    Index s = IsLower ? pi : i + 1;
                    if (k > 0)
                        rhs[i] -= (cjLhs.row(i).segment(s, k).transpose().cwiseProduct(Map<const Matrix<RhsScalar, Dynamic, 1>>(rhs + s, k))).sum();

                    if ((!(Mode & UnitDiag)) && numext::not_equal_strict(rhs[i], RhsScalar(0)))
                        rhs[i] /= cjLhs(i, i);
                }
            }
        }
    };

    // forward and backward substitution, column-major, rhs is a vector
    template <typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
    struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, ColMajor>
    {
        enum
        {
            IsLower = ((Mode & Lower) == Lower)
        };
        static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
        {
            typedef Map<const Matrix<LhsScalar, Dynamic, Dynamic, ColMajor>, 0, OuterStride<>> LhsMap;
            const LhsMap lhs(_lhs, size, size, OuterStride<>(lhsStride));
            typedef const_blas_data_mapper<LhsScalar, Index, ColMajor> LhsMapper;
            typedef const_blas_data_mapper<RhsScalar, Index, ColMajor> RhsMapper;
            typename internal::conditional<Conjugate, const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>, LhsMap>, const LhsMap&>::type cjLhs(
                lhs);
            static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;

            for (Index pi = IsLower ? 0 : size; IsLower ? pi < size : pi > 0; IsLower ? pi += PanelWidth : pi -= PanelWidth)
            {
                Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
                Index startBlock = IsLower ? pi : pi - actualPanelWidth;
                Index endBlock = IsLower ? pi + actualPanelWidth : 0;

                for (Index k = 0; k < actualPanelWidth; ++k)
                {
                    Index i = IsLower ? pi + k : pi - k - 1;
                    if (numext::not_equal_strict(rhs[i], RhsScalar(0)))
                    {
                        if (!(Mode & UnitDiag))
                            rhs[i] /= cjLhs.coeff(i, i);

                        Index r = actualPanelWidth - k - 1;  // remaining size
                        Index s = IsLower ? i + 1 : i - r;
                        if (r > 0)
                            Map<Matrix<RhsScalar, Dynamic, 1>>(rhs + s, r) -= rhs[i] * cjLhs.col(i).segment(s, r);
                    }
                }
                Index r = IsLower ? size - endBlock : startBlock;  // remaining size
                if (r > 0)
                {
                    // let's directly call the low level product function because:
                    // 1 - it is faster to compile
                    // 2 - it is slightly faster at runtime
                    general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, Conjugate, RhsScalar, RhsMapper, false>::run(
                        r,
                        actualPanelWidth,
                        LhsMapper(&lhs.coeffRef(endBlock, startBlock), lhsStride),
                        RhsMapper(rhs + startBlock, 1),
                        rhs + endBlock,
                        1,
                        RhsScalar(-1));
                }
            }
        }
    };

}  // end namespace internal

}  // end namespace Eigen

#endif  // EIGEN_TRIANGULAR_SOLVER_VECTOR_H
